In this project, a multivariate time series data which is collected by a single three-axis accelerometer is manipulated and visualized. The data belongs to the gesture patterns of eight different users (will be mentioned as “class” from now on) and consists of over 4000 instances. The data for X, Y, and Z axes is provided separately in the following link.
The primary aim is to visualize an instance from each class after manipulating the data properly. Then, two alternative time series representation methods will be applied and final plots will be compared.
library(data.table)
library(ggplot2)
# The data is imported
x <- fread("C:/Users/AHMET/Desktop/uWaveGestureLibrary_X_TRAIN")
y <- fread("C:/Users/AHMET/Desktop/uWaveGestureLibrary_Y_TRAIN")
z <- fread("C:/Users/AHMET/Desktop/uWaveGestureLibrary_Z_TRAIN")
# ID variable is added to each axis data
x[,id:=1:.N]
y[,id:=1:.N]
z[,id:=1:.N]
# Class information is added
setnames(x,'V1','Class')
setnames(y,'V1','Class')
setnames(z,'V1','Class')
# Each data table is melted for better visual representation
long_train_x <- melt(x,id.vars=c('id','Class'))
long_train_x[,time:=as.numeric(gsub("\\D", "", variable))-1]
long_train_x <- long_train_x[,list(id,Class,time,value)]
long_train_x <- long_train_x[order(Class,id,time)]
setnames(long_train_x,'value','AccelerationX')
head(long_train_x)
## id Class time AccelerationX
## 1: 11 1 1 -0.7914472
## 2: 11 1 2 -0.7914472
## 3: 11 1 3 -0.7958727
## 4: 11 1 4 -0.8100650
## 5: 11 1 5 -0.8492300
## 6: 11 1 6 -0.9034648
long_train_y <- melt(y,id.vars=c('id','Class'))
long_train_y[,time:=as.numeric(gsub("\\D", "", variable))-1]
long_train_y <- long_train_y[,list(id,Class,time,value)]
long_train_y <- long_train_y[order(Class,id,time)]
setnames(long_train_y,'value','AccelerationY')
head(long_train_y)
## id Class time AccelerationY
## 1: 11 1 1 -1.959984
## 2: 11 1 2 -1.959984
## 3: 11 1 3 -1.956600
## 4: 11 1 4 -1.945750
## 5: 11 1 5 -1.915809
## 6: 11 1 6 -1.874347
long_train_z <- melt(z,id.vars=c('id','Class'))
long_train_z[,time:=as.numeric(gsub("\\D", "", variable))-1]
long_train_z <- long_train_z[,list(id,Class,time,value)]
long_train_z <- long_train_z[order(Class,id,time)]
setnames(long_train_z,'value','AccelerationZ')
head(long_train_z)
## id Class time AccelerationZ
## 1: 11 1 1 -0.2490781
## 2: 11 1 2 -0.2490781
## 3: 11 1 3 -0.2514390
## 4: 11 1 4 -0.2590101
## 5: 11 1 5 -0.2799033
## 6: 11 1 6 -0.3088358
In this part, to be able to visualize the gesture recognition instances, location data for each axis will be gathered by taking cumulative sums of the speed data. The speed data for each axis will be gathered by the same way, that is, by taking cumulative sums of the acceleration data.
table_xyz <- copy(long_train_x)
table_xyz[,AccelerationY:=long_train_y$AccelerationY]
table_xyz[,AccelerationZ:=long_train_z$AccelerationZ]
table_xyz[,SpeedX:=cumsum(table_xyz$AccelerationX)]
table_xyz[,SpeedY:=cumsum(table_xyz$AccelerationY)]
table_xyz[,SpeedZ:=cumsum(table_xyz$AccelerationZ)]
table_xyz[,LocationX:=cumsum(table_xyz$SpeedX)]
table_xyz[,LocationY:=cumsum(table_xyz$SpeedY)]
table_xyz[,LocationZ:=cumsum(table_xyz$SpeedZ)]
head(table_xyz)
## id Class time AccelerationX AccelerationY AccelerationZ SpeedX
## 1: 11 1 1 -0.7914472 -1.959984 -0.2490781 -0.7914472
## 2: 11 1 2 -0.7914472 -1.959984 -0.2490781 -1.5828944
## 3: 11 1 3 -0.7958727 -1.956600 -0.2514390 -2.3787671
## 4: 11 1 4 -0.8100650 -1.945750 -0.2590101 -3.1888321
## 5: 11 1 5 -0.8492300 -1.915809 -0.2799033 -4.0380621
## 6: 11 1 6 -0.9034648 -1.874347 -0.3088358 -4.9415269
## SpeedY SpeedZ LocationX LocationY LocationZ
## 1: -1.959984 -0.2490781 -0.7914472 -1.959984 -0.2490781
## 2: -3.919968 -0.4981562 -2.3743415 -5.879951 -0.7472342
## 3: -5.876568 -0.7495951 -4.7531086 -11.756519 -1.4968294
## 4: -7.822319 -1.0086052 -7.9419407 -19.578838 -2.5054346
## 5: -9.738128 -1.2885085 -11.9800028 -29.316966 -3.7939431
## 6: -11.612474 -1.5973444 -16.9215298 -40.929440 -5.3912875
In this part, by sorting each class separately in the order of increasing ID values, the first set of data in the list will be chosen as an instance and plotted in 3D.
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
table_class1 <- table_xyz[(Class == 1)]
head(table_class1)
## id Class time AccelerationX AccelerationY AccelerationZ SpeedX
## 1: 11 1 1 -0.7914472 -1.959984 -0.2490781 -0.7914472
## 2: 11 1 2 -0.7914472 -1.959984 -0.2490781 -1.5828944
## 3: 11 1 3 -0.7958727 -1.956600 -0.2514390 -2.3787671
## 4: 11 1 4 -0.8100650 -1.945750 -0.2590101 -3.1888321
## 5: 11 1 5 -0.8492300 -1.915809 -0.2799033 -4.0380621
## 6: 11 1 6 -0.9034648 -1.874347 -0.3088358 -4.9415269
## SpeedY SpeedZ LocationX LocationY LocationZ
## 1: -1.959984 -0.2490781 -0.7914472 -1.959984 -0.2490781
## 2: -3.919968 -0.4981562 -2.3743415 -5.879951 -0.7472342
## 3: -5.876568 -0.7495951 -4.7531086 -11.756519 -1.4968294
## 4: -7.822319 -1.0086052 -7.9419407 -19.578838 -2.5054346
## 5: -9.738128 -1.2885085 -11.9800028 -29.316966 -3.7939431
## 6: -11.612474 -1.5973444 -16.9215298 -40.929440 -5.3912875
x1 <- subset(table_xyz$LocationX, table_xyz$id == 11)
y1 <- subset(table_xyz$LocationY, table_xyz$id == 11)
z1 <- subset(table_xyz$LocationZ, table_xyz$id == 11)
fig1 <- plot_ly(x = x1, y = y1, z = z1, type = 'scatter3d', mode = 'lines',
opacity = 1, line = list(width = 6, reverscale = FALSE))
fig1
table_class2 <- table_xyz[(Class == 2)]
head(table_class2)
## id Class time AccelerationX AccelerationY AccelerationZ SpeedX SpeedY
## 1: 15 2 1 1.199507 1.627288 1.951568 1.199506 1.627290
## 2: 15 2 2 1.199507 1.627288 1.951568 2.399013 3.254578
## 3: 15 2 3 1.233217 1.627288 1.939750 3.632230 4.881866
## 4: 15 2 4 1.325678 1.627288 1.907334 4.957909 6.509155
## 5: 15 2 5 1.363205 1.627288 1.894178 6.321114 8.136443
## 6: 15 2 6 1.378723 1.627288 1.888738 7.699837 9.763731
## SpeedZ LocationX LocationY LocationZ
## 1: 1.951575 205944.3 -2441885 -1636043
## 2: 3.903143 205946.7 -2441882 -1636039
## 3: 5.842893 205950.3 -2441877 -1636033
## 4: 7.750227 205955.3 -2441871 -1636025
## 5: 9.644406 205961.6 -2441862 -1636016
## 6: 11.533144 205969.3 -2441853 -1636004
x2 <- subset(table_xyz$LocationX, table_xyz$id == 15)
y2 <- subset(table_xyz$LocationY, table_xyz$id == 15)
z2 <- subset(table_xyz$LocationZ, table_xyz$id == 15)
fig2 <- plot_ly(x = x2, y = y2, z = z2, type = 'scatter3d', mode = 'lines',
opacity = 1, line = list(width = 6, reverscale = FALSE))
fig2
table_class3 <- table_xyz[(Class == 3)]
head(table_class3)
## id Class time AccelerationX AccelerationY AccelerationZ SpeedX
## 1: 4 3 1 0.005184784 0.3740667 0.3094552 0.005184672
## 2: 4 3 2 0.005184784 0.3740667 0.3094552 0.010369456
## 3: 4 3 3 0.005184784 0.3740667 0.3094552 0.015554241
## 4: 4 3 4 0.005184784 0.3740667 0.3094552 0.020739025
## 5: 4 3 5 0.005184784 0.3740667 0.3094552 0.025923809
## 6: 4 3 6 0.005184784 0.3740667 0.3094552 0.031108594
## SpeedY SpeedZ LocationX LocationY LocationZ
## 1: 0.3740647 0.3094619 283445.8 -3191468 -1400778
## 2: 0.7481314 0.6189171 283445.8 -3191468 -1400777
## 3: 1.1221981 0.9283723 283445.8 -3191467 -1400776
## 4: 1.4962648 1.2378275 283445.8 -3191465 -1400775
## 5: 1.8703315 1.5472827 283445.9 -3191463 -1400774
## 6: 2.2443982 1.8567378 283445.9 -3191461 -1400772
x3 <- subset(table_xyz$LocationX, table_xyz$id == 4)
y3 <- subset(table_xyz$LocationY, table_xyz$id == 4)
z3 <- subset(table_xyz$LocationZ, table_xyz$id == 4)
fig3 <- plot_ly(x = x3, y = y3, z = z3, type = 'scatter3d', mode = 'lines',
opacity = 1, line = list(width = 6, reverscale = FALSE))
fig3
table_class4 <- table_xyz[(Class == 4)]
head(table_class4)
## id Class time AccelerationX AccelerationY AccelerationZ SpeedX SpeedY
## 1: 5 4 1 1.286198 -0.3974369 -0.4660215 1.286207 -0.3974360
## 2: 5 4 2 1.286198 -0.3974369 -0.4660215 2.572404 -0.7948729
## 3: 5 4 3 1.286198 -0.3974369 -0.4660215 3.858602 -1.1923098
## 4: 5 4 4 1.286198 -0.3974369 -0.4660215 5.144800 -1.5897468
## 5: 5 4 5 1.286198 -0.3974369 -0.4660215 6.430998 -1.9871837
## 6: 5 4 6 1.286198 -0.3974369 -0.4660215 7.717196 -2.3846206
## SpeedZ LocationX LocationY LocationZ
## 1: -0.4660289 -1972297 -3679816 -510722.7
## 2: -0.9320504 -1972295 -3679817 -510723.6
## 3: -1.3980719 -1972291 -3679818 -510725.0
## 4: -1.8640935 -1972285 -3679820 -510726.9
## 5: -2.3301150 -1972279 -3679822 -510729.2
## 6: -2.7961365 -1972271 -3679824 -510732.0
x4 <- subset(table_xyz$LocationX, table_xyz$id == 5)
y4 <- subset(table_xyz$LocationY, table_xyz$id == 5)
z4 <- subset(table_xyz$LocationZ, table_xyz$id == 5)
fig4 <- plot_ly(x = x4, y = y4, z = z4, type = 'scatter3d', mode = 'lines',
opacity = 1, line = list(width = 6, reverscale = FALSE))
fig4
table_class5 <- table_xyz[(Class == 5)]
head(table_class5)
## id Class time AccelerationX AccelerationY AccelerationZ SpeedX SpeedY
## 1: 2 5 1 1.627311 0.6666239 1.786869 1.627327 0.6666244
## 2: 2 5 2 1.627311 0.6666239 1.786869 3.254638 1.3332483
## 3: 2 5 3 1.627311 0.6666239 1.786869 4.881949 1.9998721
## 4: 2 5 4 1.627311 0.6666239 1.786869 6.509260 2.6664960
## 5: 2 5 5 1.627311 0.6666239 1.786869 8.136571 3.3331198
## 6: 2 5 6 1.627311 0.6666239 1.786869 9.763882 3.9997437
## SpeedZ LocationX LocationY LocationZ
## 1: 1.786858 446467.1 -2218692 -1138534
## 2: 3.573727 446470.4 -2218691 -1138530
## 3: 5.360596 446475.2 -2218689 -1138525
## 4: 7.147464 446481.8 -2218686 -1138517
## 5: 8.934333 446489.9 -2218683 -1138509
## 6: 10.721202 446499.7 -2218679 -1138498
x5 <- subset(table_xyz$LocationX, table_xyz$id == 2)
y5 <- subset(table_xyz$LocationY, table_xyz$id == 2)
z5 <- subset(table_xyz$LocationZ, table_xyz$id == 2)
fig5 <- plot_ly(x = x5, y = y5, z = z5, type = 'scatter3d', mode = 'lines',
opacity = 1, line = list(width = 6, reverscale = FALSE))
fig5
table_class6 <- table_xyz[(Class == 6)]
head(table_class6)
## id Class time AccelerationX AccelerationY AccelerationZ SpeedX
## 1: 1 6 1 -0.3042432 -2.119396 -1.528965 -0.3042271
## 2: 1 6 2 -0.3042432 -2.119396 -1.528965 -0.6084704
## 3: 1 6 3 -0.3042432 -2.119396 -1.528965 -0.9127136
## 4: 1 6 4 -0.3042432 -2.119396 -1.528965 -1.2169568
## 5: 1 6 5 -0.3042432 -2.119396 -1.528965 -1.5212000
## 6: 1 6 6 -0.3042432 -2.119396 -1.528965 -1.8254432
## SpeedY SpeedZ LocationX LocationY LocationZ
## 1: -2.119381 -1.528970 1257061 695244.7 1696623
## 2: -4.238777 -3.057936 1257061 695240.5 1696620
## 3: -6.358173 -4.586901 1257060 695234.1 1696615
## 4: -8.477568 -6.115866 1257059 695225.6 1696609
## 5: -10.596964 -7.644831 1257057 695215.0 1696601
## 6: -12.716360 -9.173796 1257055 695202.3 1696592
x6 <- subset(table_xyz$LocationX, table_xyz$id == 1)
y6 <- subset(table_xyz$LocationY, table_xyz$id == 1)
z6 <- subset(table_xyz$LocationZ, table_xyz$id == 1)
fig6 <- plot_ly(x = x6, y = y6, z = z6, type = 'scatter3d', mode = 'lines',
opacity = 1, line = list(width = 6, reverscale = FALSE))
fig6
table_class7 <- table_xyz[(Class == 7)]
head(table_class7)
## id Class time AccelerationX AccelerationY AccelerationZ SpeedX SpeedY
## 1: 7 7 1 1.474328 1.259287 1.237417 1.474327 1.259284
## 2: 7 7 2 1.474328 1.259287 1.237417 2.948655 2.518571
## 3: 7 7 3 1.474328 1.259287 1.237417 4.422983 3.777858
## 4: 7 7 4 1.474328 1.259287 1.237417 5.897310 5.037145
## 5: 7 7 5 1.474328 1.259287 1.237417 7.371638 6.296431
## 6: 7 7 6 1.474328 1.259287 1.237417 8.845966 7.555718
## SpeedZ LocationX LocationY LocationZ
## 1: 1.237391 1742839 -998424.2 -276864.5
## 2: 2.474809 1742842 -998421.7 -276862.1
## 3: 3.712226 1742846 -998417.9 -276858.3
## 4: 4.949643 1742852 -998412.8 -276853.4
## 5: 6.187060 1742859 -998406.5 -276847.2
## 6: 7.424478 1742868 -998399.0 -276839.8
x7 <- subset(table_xyz$LocationX, table_xyz$id == 7)
y7 <- subset(table_xyz$LocationY, table_xyz$id == 7)
z7 <- subset(table_xyz$LocationZ, table_xyz$id == 7)
fig7 <- plot_ly(x = x7, y = y7, z = z7, type = 'scatter3d', mode = 'lines',
opacity = 1, line = list(width = 6, reverscale = FALSE))
fig7
table_class8 <- table_xyz[(Class == 8)]
head(table_class8)
## id Class time AccelerationX AccelerationY AccelerationZ SpeedX SpeedY
## 1: 6 8 1 -0.4792525 -1.08008 0.6564008 -0.4792638 -1.080080
## 2: 6 8 2 -0.4792525 -1.08008 0.6564008 -0.9585162 -2.160160
## 3: 6 8 3 -0.4792525 -1.08008 0.6564008 -1.4377687 -3.240241
## 4: 6 8 4 -0.4792525 -1.08008 0.6564008 -1.9170211 -4.320321
## 5: 6 8 5 -0.4792525 -1.08008 0.6564008 -2.3962736 -5.400402
## 6: 6 8 6 -0.4792525 -1.08008 0.6564008 -2.8755261 -6.480482
## SpeedZ LocationX LocationY LocationZ
## 1: 0.6563666 1424117 -1149898 -326687.9
## 2: 1.3127674 1424116 -1149900 -326686.6
## 3: 1.9691682 1424115 -1149903 -326684.6
## 4: 2.6255690 1424113 -1149907 -326682.0
## 5: 3.2819698 1424111 -1149913 -326678.7
## 6: 3.9383707 1424108 -1149919 -326674.8
x8 <- subset(table_xyz$LocationX, table_xyz$id == 6)
y8 <- subset(table_xyz$LocationY, table_xyz$id == 6)
z8 <- subset(table_xyz$LocationZ, table_xyz$id == 6)
fig8 <- plot_ly(x = x8, y = y8, z = z8, type = 'scatter3d', mode = 'lines',
opacity = 1, line = list(width = 6, reverscale = FALSE))
fig8
In both approaches, location data in the X axis (LocationX) is selected for investigation since there are three variables and representations can be shown using only one variable.
In this approach, sample statistics for LocationX data are calculated for each class separately.
library(BBmisc)
##
## Attaching package: 'BBmisc'
## The following object is masked from 'package:base':
##
## isFALSE
# Mean is calculated
mean_class <- setDT(table_xyz)[, mean(LocationX), by = Class]
setnames(mean_class,'V1','Mean')
# Median is calculated
median_class <- setDT(table_xyz)[, median(LocationX), by = Class]
setnames(median_class,'V1','Median')
# Standard deviation is calculated
sd_class <- setDT(table_xyz)[, sd(LocationX), by = Class]
setnames(sd_class,'V1','StDev')
stats <- copy(mean_class)
stats[,Median:=median_class$Median]
stats[,StDev:=sd_class$StDev]
stats
## Class Mean Median StDev
## 1: 1 117111.4 134033.4 53796.64
## 2: 2 236503.2 236055.7 18726.29
## 3: 3 -826367.7 -806507.6 651878.43
## 4: 4 -751405.8 -747953.9 688787.49
## 5: 5 950337.8 1016160.6 244720.67
## 6: 6 1499107.4 1535415.3 189190.74
## 7: 7 1561317.9 1578033.2 106669.38
## 8: 8 1585863.7 1595699.6 88141.69
Median and mean values for each class are plotted below to see whether they differ from each other significantly.
ggplot(stats, aes(x=Class, y=Median, color=Class)) + geom_step()
ggplot(stats, aes(x=Class, y=Mean, color=Class)) + geom_step()
In this approach, piecewise method is applied for each class separately. To be able to plot them together, segment lengths are calculated for each class separately so that index length is 40 for all classes.
library(TSrepr)
library(repr)
# PAA for class = 1
data_ts1=table_xyz[Class==1]$LocationX
paa1=repr_paa(data_ts1, 961, meanC)
paa <- data.table(paa1)
# PAA for class = 2
data_ts2=table_xyz[Class==2]$LocationX
paa2=repr_paa(data_ts2, 851, meanC)
paa[,paa2:=paa2]
# PAA for class = 3
data_ts3=table_xyz[Class==3]$LocationX
paa3=repr_paa(data_ts3, 835, meanC)
paa[,paa3:=paa3]
# PAA for class = 4
data_ts4=table_xyz[Class==4]$LocationX
paa4=repr_paa(data_ts4, 867, meanC)
paa[,paa4:=paa4]
# PAA for class = 5
data_ts5=table_xyz[Class==5]$LocationX
paa5=repr_paa(data_ts5, 1001, meanC)
paa[,paa5:=paa5]
# PAA for class = 6
data_ts6=table_xyz[Class==6]$LocationX
paa6=repr_paa(data_ts6, 876, meanC)
paa[,paa6:=paa6]
# PAA for class = 7
data_ts7=table_xyz[Class==7]$LocationX
paa7=repr_paa(data_ts7, 882, meanC)
paa[,paa7:=paa7]
# PAA for class = 8
data_ts8=table_xyz[Class==8]$LocationX
paa8=repr_paa(data_ts8, 788, meanC)
paa[,paa8:=paa8]
paa[,index:=1:.N]
head(paa)
## paa1 paa2 paa3 paa4 paa5 paa6 paa7 paa8 index
## 1: 2256.491 210819.2 259645.61 -1939238 473062.5 1249821 1739992 1425727 1
## 2: 17247.827 212210.9 194839.51 -1871649 483405.0 1254094 1720354 1442248 2
## 3: 15206.197 213092.2 132305.17 -1801114 527704.1 1256074 1718221 1451153 3
## 4: 39295.900 215297.0 88066.26 -1741354 576372.5 1258265 1707239 1450668 4
## 5: 45533.185 211864.7 36358.79 -1681082 628118.3 1275467 1702599 1452429 5
## 6: 45801.326 211930.4 -29012.79 -1620872 626234.2 1275363 1708314 1473580 6
Then, the data table for PAA values is melted for plotting purposes. The long data table is used to plot all PAA values together below.
long_paa <- melt(paa, id.vars = "index")
head(long_paa)
## index variable value
## 1: 1 paa1 2256.491
## 2: 2 paa1 17247.827
## 3: 3 paa1 15206.197
## 4: 4 paa1 39295.900
## 5: 5 paa1 45533.185
## 6: 6 paa1 45801.326
ggplot(long_paa, aes(index, value, col = variable)) + geom_line()
When the plots for both approaches are investigated, it is easier to differentiate the classes in the piecewise aggregate approximation method. In the sample statistics approach, when the data table is read, it is seen that the statistical values are different for each class. However, the plots are not clear enough to see any significant difference. Hence, piecewise aggregate approximation is chosen as the better representation.